###########################
## Visualizing partial
## dependencies
###########################

## Get processed data
load("./FullData.RData")
train.data  <- all.data$fData[,-grep("uid|train|fraud",colnames(all.data$fData))]   


## Load samples from pdbart runs
load("./BartPlotObjects.RData")



## Create index of categorical and continuous variables 
## as col number in the training dataset.
## The hard-coded indeces correspond to columns in
## train.data, as defined in line 14 above.
cont.var.index <- (1:7)
cat.var.index <- (8:31)

## Set quantiles for credibility intervals
quants.for.ci <- c(0.1,0.5,0.9)

## CATEGORICAL VARIABLES
## Create data objects suitable for lattice
## graphing
## Retrieve predicted values:
vals <- ldply(cat.var.index
              ,.fun=function(x)quantile(bartFullPlots[[x]]$fd[[1]][,11]
                                        ,probs=quants.for.ci))
vals  <- rbind(vals,quantile(bartFullPlots[[31]]$fd[[1]][,1]
                             ,probs=quants.for.ci)) #Correct for fact that makeind() only created one dummy for instability
names(vals) <- c("Low","Median","High")
## Add corresponding focal variable (i.e. variables for which)
## partial dependency was estimated):
vals$Variable <- c(rep("Regime",length(grep("regime",colnames(train.data))))
                   ,rep("Inequality",length(grep("inequality",colnames(train.data))))
                   ,rep("Average District Magnitude",length(grep("mag",colnames(train.data))))
                   ,rep("Change in GDP",length(grep("gdpchange",colnames(train.data))))
                   ,rep("Commission",length(grep("commission",colnames(train.data))))
                   ,rep("Instability",length(grep("instability",colnames(train.data)))+1)
)
## Add variable categories (categories must be distinct, hence the
## addional whitespaces):
vals$Variable.Cat <- c("Anocracy"
                              ,"Autocracy"
                              ,"N/A"
                              ,"New Dem."
                              ,"Old Dem."
                       ,c("1st Q.  ","2nd Q.  ","3rd Q.  ","4th Q.  ")
                       ,"N/A "
                       ,c("1st Q. ","2nd Q. ","3rd Q. ","4th Q. ")
                       ,"N/A  "
                       ,c("1st Q.","2nd Q.","3rd Q.","4th Q.")
                       ,"N/A   "
                              ,"Gov. only"
                              ,"Mixed"
                              ,"Independent"
                              ,"Stable"
                              ,"Unstable"
)
vals$Variable.Cat <- factor(vals$Variable.Cat, levels=c("Autocracy"
                                                        ,"Anocracy"
                                                        ,"New Dem."
                                                        ,"Old Dem."
                                                        ,"N/A"
                                                        ,c("1st Q.  ","2nd Q.  ","3rd Q.  ","4th Q.  ")
                                                        ,"N/A "
                                                        ,c("1st Q. ","2nd Q. ","3rd Q. ","4th Q. ")
                                                        ,"N/A  "
                                                        ,c("1st Q.","2nd Q.","3rd Q.","4th Q.")
                                                        ,"N/A   "
                                                        ,"Gov. only"
                                                        ,"Mixed"
                                                        ,"Independent"
                                                        ,"Stable"
                                                        ,"Unstable"
))
vals

## Plot using lattice (Figure 5)
mytheme <- standard.theme("pdf", color=FALSE)
mytheme$axis.components$left$tck <- 0.2
mytheme$axis.components$left$pad1 <- .5
mytheme$axis.components$left$pad2 <- 1.75
mytheme$axis.components$bottom$pad1 <- .5
mytheme$axis.components$bottom$pad2 <- 1.75
mytheme$axis.text$cex=.6

cat.vars <- dotplot(Median~Variable.Cat|Variable
                    ,med=vals$Median
                    ,low=vals$Low
                    ,high=vals$High
                    ,data=vals
                    ,layout=c(2,3)
                    ,ylab="Predicted Fraud Score"
                    ,cex=1
                    ,par.settings=mytheme
                    ,horizontal=FALSE
                    ,scales=list(x=list(relation="free",cex=0.8),y=list(relation="free",cex=0.8))
                    ,prepanel=function(x,y,subscripts,med,low,high,...){
                      list(ylim=c(min(low[subscripts]),max(high[subscripts])))
                    }
                    ,panel=function(x,y,subscripts, med,low,high,...){
                      panel.xyplot(x,y,pch=19,...)
                      panel.segments(as.numeric(x)
                                     ,low[subscripts]
                                     , as.numeric(x)
                                     ,high[subscripts]
                                     ,lend=2
                      )
                    }
                    ,par.strip.text=list(lines=1.3)
                    ,strip = function(..., bg,par.strip.text){
                      strip.default(..., bg="gray80"
                                    ,par.strip.text=list(alpha=1
                                                         ,lineheight=1
                                                         ,cex=1
                                                         ,col="black"
                                                         ,font=2
                                                         ,lines=2
                                    ))}
)
postscript("./CatVariablesRef.ps",width=8,height=7)
cat.vars
dev.off()

## CONTINUOUS VARIABLES
## Create data objects suitable for lattice
## graphing
## Retrieve predicted values:
vals.c <- ldply(cont.var.index
                ,.fun=function(x)t(apply(bartFullPlots[[x]]$fd[[1]],2,quantile,probs=quants.for.ci))
)
names(vals.c) <- c("Low","Median","High")
## Add variable names
vals.c$Variable <- rep(c("Uniform Last Digit"
                         ,"Distance Last Pair"
                         ,"Mean Second Digit"
                         ,"Benford 2nd Digit"
                         ,"Ethnolinguistic Fract."
                         ,"Urbanization"
                         ,"Turnout")
                       ,each=11)
vals.c$Variable <- factor(vals.c$Variable
                          ,levels=c("Ethnolinguistic Fract."
                                    ,"Urbanization"
                                    ,"Turnout"
                                    ,"Distance Last Pair"
                                    ,"Uniform Last Digit"
                                    ,"Benford 2nd Digit"
                                    ,"Mean Second Digit"
                          ))
## Add levels at which PD was evaluated
vals.c$Levels <- do.call("c",llply(cont.var.index
                                 ,.fun=function(x)(unlist(bartFullPlots[[x]]$levs))))

## Plot using lattice (Figure 6)
cont.vars <- xyplot(Median~Levels|Variable
                    ,layout=c(2,4)
                    ,med=vals.c$Median
                    ,low=vals.c$Low
                    ,high=vals.c$High
                    ,labs =vals.c$DecileValues
                    ,data=vals.c
                    ,cex=1.2
                    ,as.table=TRUE
                    ,scales=list(x=list(relation="free",cex=0.8),y=list(relation="free",cex=0.8))
                    ,xlab="Observed Range of Variable Values"
                    ,ylab="Predicted Fraud Score"
                    ,par.settings=mytheme
                    ,prepanel=function(x,y,subscripts,med,low,high,...){
                      list(ylim=c(min(low[subscripts]),max(high[subscripts])))
                    }
                    ,panel=function(x,y,subscripts, med,low,high,labs,...){
                      panel.polygon(c(x,rev(x))
                                    ,grid=FALSE
                                    ,c(low[subscripts],rev(high[subscripts]))
                                    ,col="gray70"
                                    ,border=NA)
                      panel.lines(x,y,col="black",lwd=2,...)
                      panel.axis("bottom"
                                 ,labels=labs[subscripts]
                                 ,outside=TRUE
                                 ,half=FALSE
                                 ,rot=0)
                    }
                    ,par.strip.text=list(lines=1.3)
                    ,strip = function(..., bg,par.strip.text){
                      strip.default(..., bg="gray80"
                                    ,par.strip.text=list(alpha=1
                                                         ,lineheight=1.2
                                                         ,cex=1
                                                         ,col="gray20"
                                                         ,font=2
                                    ))}
)
postscript("./ContVariablesRef.ps",width=8,height=8)
cont.vars
dev.off()


